mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 16:30:15 +01:00
📚 feat: Add Source Citations for File Search in Agents (#8652)
* feat: Source Citations for file_search in Agents * Fix: Added citation limits and relevance score to app service. Removed duplicate tests * ✨ feat: implement Role-level toggle to optionally disable file Source Citation in Agents * 🐛 fix: update mock for librechat-data-provider to include PermissionTypes and SystemRoles --------- Co-authored-by: “Praneeth <praneeth.goparaju@slalom.com>
This commit is contained in:
parent
a955097faf
commit
52e59e40be
36 changed files with 1890 additions and 190 deletions
266
api/app/clients/agents/processAgentResponse.js
Normal file
266
api/app/clients/agents/processAgentResponse.js
Normal file
|
|
@ -0,0 +1,266 @@
|
|||
const { Files } = require('~/models');
|
||||
const { getCustomConfig } = require('~/server/services/Config/getCustomConfig');
|
||||
const { nanoid } = require('nanoid');
|
||||
const { Tools, PermissionTypes, Permissions } = require('librechat-data-provider');
|
||||
const { logger } = require('~/config');
|
||||
const { checkAccess } = require('@librechat/api');
|
||||
const { getRoleByName } = require('~/models/Role');
|
||||
|
||||
/**
|
||||
* Processes agent response to extract and capture file references from tool calls
|
||||
*/
|
||||
const processAgentResponse = async (
|
||||
response,
|
||||
userId,
|
||||
conversationId,
|
||||
contentParts = [],
|
||||
user = null,
|
||||
) => {
|
||||
try {
|
||||
if (!response.messageId) {
|
||||
logger.warn('[processAgentResponse] No messageId in response');
|
||||
return response;
|
||||
}
|
||||
|
||||
// Check file citations permission following PROMPTS pattern
|
||||
if (user) {
|
||||
try {
|
||||
// Clear role cache to ensure fresh data (following PROMPTS pattern)
|
||||
const hasFileCitationsAccess = await checkAccess({
|
||||
user,
|
||||
permissionType: PermissionTypes.FILE_CITATIONS,
|
||||
permissions: [Permissions.USE],
|
||||
getRoleByName,
|
||||
});
|
||||
|
||||
if (!hasFileCitationsAccess) {
|
||||
logger.debug(
|
||||
`[processAgentResponse] User ${userId} does not have FILE_CITATIONS permission`,
|
||||
);
|
||||
return response; // Return response without file citations
|
||||
}
|
||||
|
||||
logger.debug(
|
||||
`[processAgentResponse] FILE_CITATIONS permission verified for user ${userId}`,
|
||||
);
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
`[processAgentResponse] Permission check failed for FILE_CITATIONS: ${error.message}`,
|
||||
);
|
||||
// Fail open for permission errors to avoid breaking existing functionality
|
||||
logger.debug(`[processAgentResponse] Proceeding with citations due to permission error`);
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug(`[processAgentResponse] Processing citations for user ${userId}`);
|
||||
|
||||
const customConfig = await getCustomConfig();
|
||||
const maxCitations = customConfig?.endpoints?.agents?.maxCitations ?? 30;
|
||||
const maxCitationsPerFile = customConfig?.endpoints?.agents?.maxCitationsPerFile ?? 5;
|
||||
const minRelevanceScore = customConfig?.endpoints?.agents?.minRelevanceScore ?? 0.45;
|
||||
|
||||
const fileSearchResults = extractFileResults(contentParts);
|
||||
if (!fileSearchResults.length) {
|
||||
logger.warn('[processAgentResponse] No file search results found');
|
||||
return response;
|
||||
}
|
||||
|
||||
// Filter results based on relevance score cutoff
|
||||
const filteredResults = fileSearchResults.filter(
|
||||
(result) => result.relevance >= minRelevanceScore,
|
||||
);
|
||||
|
||||
const filteredCount = fileSearchResults.length - filteredResults.length;
|
||||
if (filteredCount > 0) {
|
||||
logger.debug(
|
||||
`[processAgentResponse] Filtered out ${filteredCount} sources below relevance threshold of ${minRelevanceScore}`,
|
||||
);
|
||||
}
|
||||
|
||||
if (filteredResults.length === 0) {
|
||||
logger.debug(
|
||||
`[processAgentResponse] No results above relevance threshold of ${minRelevanceScore} (filtered ${fileSearchResults.length} total results)`,
|
||||
);
|
||||
return response;
|
||||
}
|
||||
|
||||
const selectedResults = selectBestResults(filteredResults, maxCitations, maxCitationsPerFile);
|
||||
const sources = await createSourcesWithMetadata(selectedResults, customConfig);
|
||||
|
||||
if (sources.length > 0) {
|
||||
logger.debug(
|
||||
'[processAgentResponse] Creating file search attachment with sources:',
|
||||
sources.length,
|
||||
);
|
||||
|
||||
const fileSearchAttachment = {
|
||||
messageId: response.messageId,
|
||||
toolCallId: 'file_search_results',
|
||||
conversationId,
|
||||
name: `${Tools.file_search}_file_search_results_${nanoid()}`,
|
||||
type: Tools.file_search,
|
||||
[Tools.file_search]: { sources },
|
||||
};
|
||||
|
||||
response.attachments = response.attachments || [];
|
||||
response.attachments.push(fileSearchAttachment);
|
||||
}
|
||||
|
||||
return response;
|
||||
} catch (error) {
|
||||
logger.error('[processAgentResponse] Error processing agent response:', error);
|
||||
return response;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Extract file results from content parts (simplified)
|
||||
*/
|
||||
const extractFileResults = (contentParts) => {
|
||||
const results = [];
|
||||
|
||||
for (const part of contentParts) {
|
||||
let toolResult = null;
|
||||
|
||||
if (part.type === 'tool_call' && part.tool_call?.name === 'file_search') {
|
||||
toolResult = part.tool_result || part.tool_call?.output;
|
||||
} else if (
|
||||
(part.type === 'tool_result' || part.type === 'tool_call') &&
|
||||
part.tool_result &&
|
||||
typeof part.tool_result === 'string' &&
|
||||
part.tool_result.includes('File:')
|
||||
) {
|
||||
toolResult = part.tool_result;
|
||||
} else if (part.content && typeof part.content === 'string' && part.content.includes('File:')) {
|
||||
toolResult = part.content;
|
||||
}
|
||||
|
||||
if (toolResult) {
|
||||
results.push(...parseFileSearchResults(toolResult));
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
};
|
||||
|
||||
/**
|
||||
* Select best results with file diversity, allowing multiple pages per file
|
||||
*/
|
||||
const selectBestResults = (results, maxCitations, maxCitationsPerFile = 5) => {
|
||||
const byFile = {};
|
||||
results.forEach((result) => {
|
||||
if (!byFile[result.file_id]) {
|
||||
byFile[result.file_id] = [];
|
||||
}
|
||||
byFile[result.file_id].push(result);
|
||||
});
|
||||
|
||||
const representatives = [];
|
||||
for (const fileId in byFile) {
|
||||
const fileResults = byFile[fileId].sort((a, b) => b.relevance - a.relevance);
|
||||
// Take up to maxCitationsPerFile results per file instead of just one
|
||||
const selectedFromFile = fileResults.slice(0, maxCitationsPerFile);
|
||||
representatives.push(...selectedFromFile);
|
||||
}
|
||||
|
||||
return representatives.sort((a, b) => b.relevance - a.relevance).slice(0, maxCitations);
|
||||
};
|
||||
|
||||
/**
|
||||
* Create sources with metadata
|
||||
*/
|
||||
const createSourcesWithMetadata = async (results, customConfig) => {
|
||||
const fileIds = [...new Set(results.map((result) => result.file_id))];
|
||||
|
||||
let fileMetadataMap = {};
|
||||
try {
|
||||
const files = await Files.find({ file_id: { $in: fileIds } });
|
||||
fileMetadataMap = files.reduce((map, file) => {
|
||||
map[file.file_id] = file;
|
||||
return map;
|
||||
}, {});
|
||||
} catch (error) {
|
||||
logger.error('[processAgentResponse] Error looking up file metadata:', error);
|
||||
}
|
||||
|
||||
return results.map((result) => {
|
||||
const fileRecord = fileMetadataMap[result.file_id] || {};
|
||||
const configuredStorageType = fileRecord.source || customConfig?.fileStrategy || 'local';
|
||||
|
||||
return {
|
||||
fileId: result.file_id,
|
||||
fileName: fileRecord.filename || 'Unknown File',
|
||||
pages: result.page ? [result.page] : [],
|
||||
relevance: result.relevance,
|
||||
type: 'file',
|
||||
pageRelevance: result.pageRelevance || {},
|
||||
metadata: { storageType: configuredStorageType },
|
||||
};
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* Parse file search results (simplified)
|
||||
*/
|
||||
const parseFileSearchResults = (formattedResults) => {
|
||||
const results = [];
|
||||
|
||||
try {
|
||||
let dataToProcess = formattedResults;
|
||||
const internalDataMatch = formattedResults.match(
|
||||
/<!-- INTERNAL_DATA_START -->\n(.*?)\n<!-- INTERNAL_DATA_END -->/s,
|
||||
);
|
||||
if (internalDataMatch) {
|
||||
dataToProcess = internalDataMatch[1];
|
||||
}
|
||||
|
||||
const sections = dataToProcess.split(/\n\s*\n|\n---\n/);
|
||||
|
||||
for (const section of sections) {
|
||||
if (!section.trim()) continue;
|
||||
|
||||
const lines = section.trim().split('\n');
|
||||
let filename = '';
|
||||
let file_id = '';
|
||||
let relevance = 0;
|
||||
let content = '';
|
||||
let page = null;
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmedLine = line.trim();
|
||||
if (trimmedLine.startsWith('File: ')) {
|
||||
filename = trimmedLine.replace('File: ', '').trim();
|
||||
} else if (trimmedLine.startsWith('File_ID: ')) {
|
||||
file_id = trimmedLine.replace('File_ID: ', '').trim();
|
||||
} else if (trimmedLine.startsWith('Relevance: ')) {
|
||||
relevance = parseFloat(trimmedLine.replace('Relevance: ', '').trim()) || 0;
|
||||
} else if (trimmedLine.startsWith('Page: ')) {
|
||||
const pageStr = trimmedLine.replace('Page: ', '').trim();
|
||||
page = pageStr !== 'N/A' && pageStr !== '' ? parseInt(pageStr) : null;
|
||||
} else if (trimmedLine.startsWith('Content: ')) {
|
||||
content = trimmedLine.replace('Content: ', '').trim();
|
||||
}
|
||||
}
|
||||
|
||||
if (filename && (relevance > 0 || file_id)) {
|
||||
const finalFileId = file_id || filename.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase();
|
||||
results.push({
|
||||
file_id: finalFileId,
|
||||
filename,
|
||||
relevance: relevance || 0.5,
|
||||
content,
|
||||
page,
|
||||
pageRelevance: page ? { [page]: relevance || 0.5 } : {},
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('[parseFileSearchResults] Error parsing results:', error);
|
||||
}
|
||||
|
||||
return results;
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
processAgentResponse,
|
||||
};
|
||||
|
|
@ -114,11 +114,13 @@ const createFileSearchTool = async ({ req, files, entity_id }) => {
|
|||
}
|
||||
|
||||
const formattedResults = validResults
|
||||
.flatMap((result) =>
|
||||
.flatMap((result, fileIndex) =>
|
||||
result.data.map(([docInfo, distance]) => ({
|
||||
filename: docInfo.metadata.source.split('/').pop(),
|
||||
content: docInfo.page_content,
|
||||
distance,
|
||||
file_id: files[fileIndex]?.file_id,
|
||||
page: docInfo.metadata.page || null,
|
||||
})),
|
||||
)
|
||||
// TODO: results should be sorted by relevance, not distance
|
||||
|
|
@ -128,18 +130,34 @@ const createFileSearchTool = async ({ req, files, entity_id }) => {
|
|||
|
||||
const formattedString = formattedResults
|
||||
.map(
|
||||
(result) =>
|
||||
`File: ${result.filename}\nRelevance: ${1.0 - result.distance.toFixed(4)}\nContent: ${
|
||||
(result, index) =>
|
||||
`File: ${result.filename}\nAnchor: \\ue202turn0file${index} (${result.filename})\nRelevance: ${(1.0 - result.distance).toFixed(4)}\nContent: ${
|
||||
result.content
|
||||
}\n`,
|
||||
)
|
||||
.join('\n---\n');
|
||||
|
||||
return formattedString;
|
||||
// Add hidden file_id data for processAgentResponse parsing
|
||||
const internalData = formattedResults
|
||||
.map(
|
||||
(result) =>
|
||||
`File: ${result.filename}\nFile_ID: ${result.file_id}\nRelevance: ${(1.0 - result.distance).toFixed(4)}\nPage: ${result.page || 'N/A'}\nContent: ${result.content}\n`,
|
||||
)
|
||||
.join('\n---\n');
|
||||
|
||||
return `${formattedString}\n\n<!-- INTERNAL_DATA_START -->\n${internalData}\n<!-- INTERNAL_DATA_END -->`;
|
||||
},
|
||||
{
|
||||
name: Tools.file_search,
|
||||
description: `Performs semantic search across attached "${Tools.file_search}" documents using natural language queries. This tool analyzes the content of uploaded files to find relevant information, quotes, and passages that best match your query. Use this to extract specific information or find relevant sections within the available documents.`,
|
||||
description: `Performs semantic search across attached "${Tools.file_search}" documents using natural language queries. This tool analyzes the content of uploaded files to find relevant information, quotes, and passages that best match your query. Use this to extract specific information or find relevant sections within the available documents.
|
||||
|
||||
**CITE FILE SEARCH RESULTS:**
|
||||
Use anchor markers immediately after statements derived from file content. Reference the filename in your text:
|
||||
- File citation: "The document.pdf states that... \\ue202turn0file0"
|
||||
- Page reference: "According to report.docx... \\ue202turn0file1"
|
||||
- Multi-file: "Multiple sources confirm... \\ue200\\ue202turn0file0\\ue202turn0file1\\ue201"
|
||||
|
||||
**ALWAYS mention the filename in your text before the citation marker. NEVER use markdown links or footnotes.**`,
|
||||
schema: z.object({
|
||||
query: z
|
||||
.string()
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ const {
|
|||
} = require('./Message');
|
||||
const { getConvoTitle, getConvo, saveConvo, deleteConvos } = require('./Conversation');
|
||||
const { getPreset, getPresets, savePreset, deletePresets } = require('./Preset');
|
||||
const { File } = require('~/db/models');
|
||||
|
||||
module.exports = {
|
||||
...methods,
|
||||
|
|
@ -51,4 +52,6 @@ module.exports = {
|
|||
getPresets,
|
||||
savePreset,
|
||||
deletePresets,
|
||||
|
||||
Files: File,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ const BaseClient = require('~/app/clients/BaseClient');
|
|||
const { getRoleByName } = require('~/models/Role');
|
||||
const { loadAgent } = require('~/models/Agent');
|
||||
const { getMCPManager } = require('~/config');
|
||||
const { processAgentResponse } = require('~/app/clients/agents/processAgentResponse');
|
||||
|
||||
const omitTitleOptions = new Set([
|
||||
'stream',
|
||||
|
|
@ -838,7 +839,7 @@ class AgentClient extends BaseClient {
|
|||
|
||||
if (noSystemMessages === true && systemContent?.length) {
|
||||
const latestMessageContent = _messages.pop().content;
|
||||
if (typeof latestMessage !== 'string') {
|
||||
if (typeof latestMessageContent !== 'string') {
|
||||
latestMessageContent[0].text = [systemContent, latestMessageContent[0].text].join('\n');
|
||||
_messages.push(new HumanMessage({ content: latestMessageContent }));
|
||||
} else {
|
||||
|
|
@ -1034,6 +1035,28 @@ class AgentClient extends BaseClient {
|
|||
if (attachments && attachments.length > 0) {
|
||||
this.artifactPromises.push(...attachments);
|
||||
}
|
||||
|
||||
// Process agent response to capture file references and create attachments
|
||||
|
||||
const processedResponse = await processAgentResponse(
|
||||
{
|
||||
messageId: this.responseMessageId,
|
||||
attachments: this.artifactPromises,
|
||||
},
|
||||
this.user ?? this.options.req.user?.id,
|
||||
this.conversationId,
|
||||
this.contentParts,
|
||||
this.options.req.user,
|
||||
);
|
||||
|
||||
// Update artifact promises with any new attachments from agent response
|
||||
if (processedResponse.attachments && processedResponse.attachments.length > 0) {
|
||||
// Add new attachments to existing artifactPromises
|
||||
processedResponse.attachments.forEach((attachment) => {
|
||||
this.artifactPromises.push(Promise.resolve(attachment));
|
||||
});
|
||||
}
|
||||
|
||||
await this.recordCollectedUsage({ context: 'message' });
|
||||
} catch (err) {
|
||||
logger.error(
|
||||
|
|
|
|||
|
|
@ -25,9 +25,55 @@ const { refreshS3FileUrls } = require('~/server/services/Files/S3/crud');
|
|||
const { getProjectByName } = require('~/models/Project');
|
||||
const { getAssistant } = require('~/models/Assistant');
|
||||
const { getAgent } = require('~/models/Agent');
|
||||
const { cleanFileName } = require('~/server/utils/files');
|
||||
const { getLogStores } = require('~/cache');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
/**
|
||||
* Checks if user has access to shared agent file through agent ownership or permissions
|
||||
*/
|
||||
const checkSharedFileAccess = async (userId, fileId) => {
|
||||
try {
|
||||
// Find agents that have this file in their tool_resources
|
||||
const agentsWithFile = await getAgent({
|
||||
$or: [
|
||||
{ 'tool_resources.file_search.file_ids': fileId },
|
||||
{ 'tool_resources.execute_code.file_ids': fileId },
|
||||
{ 'tool_resources.ocr.file_ids': fileId },
|
||||
],
|
||||
});
|
||||
|
||||
if (!agentsWithFile || agentsWithFile.length === 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if user has access to any of these agents
|
||||
for (const agent of Array.isArray(agentsWithFile) ? agentsWithFile : [agentsWithFile]) {
|
||||
// Check if user is the agent author
|
||||
if (agent.author && agent.author.toString() === userId) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if agent is collaborative
|
||||
if (agent.isCollaborative) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if user has access through project membership
|
||||
if (agent.projectIds && agent.projectIds.length > 0) {
|
||||
// For now, return true if agent has project IDs (simplified check)
|
||||
// This could be enhanced to check actual project membership
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (error) {
|
||||
logger.error('[checkSharedFileAccess] Error:', error);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
router.get('/', async (req, res) => {
|
||||
|
|
@ -308,21 +354,32 @@ router.get('/download/:userId/:file_id', async (req, res) => {
|
|||
const { userId, file_id } = req.params;
|
||||
logger.debug(`File download requested by user ${userId}: ${file_id}`);
|
||||
|
||||
if (userId !== req.user.id) {
|
||||
logger.warn(`${errorPrefix} forbidden: ${file_id}`);
|
||||
return res.status(403).send('Forbidden');
|
||||
}
|
||||
|
||||
const [file] = await getFiles({ file_id });
|
||||
const errorPrefix = `File download requested by user ${userId}`;
|
||||
const [file] = await getFiles({ file_id });
|
||||
|
||||
if (!file) {
|
||||
logger.warn(`${errorPrefix} not found: ${file_id}`);
|
||||
return res.status(404).send('File not found');
|
||||
}
|
||||
|
||||
if (!file.filepath.includes(userId)) {
|
||||
logger.warn(`${errorPrefix} forbidden: ${file_id}`);
|
||||
// Extract actual file owner from S3 filepath (e.g., /uploads/ownerId/filename)
|
||||
let actualFileOwner = userId;
|
||||
if (file.filepath && file.filepath.includes('/uploads/')) {
|
||||
const pathMatch = file.filepath.match(/\/uploads\/([^/]+)\//);
|
||||
if (pathMatch) {
|
||||
actualFileOwner = pathMatch[1];
|
||||
}
|
||||
}
|
||||
|
||||
// Check access: either own the file or have shared access through conversations
|
||||
const isFileOwner = req.user.id === actualFileOwner;
|
||||
const hasSharedAccess = !isFileOwner && (await checkSharedFileAccess(req.user.id, file_id));
|
||||
|
||||
if (!isFileOwner && !hasSharedAccess) {
|
||||
return res.status(403).send('Forbidden');
|
||||
}
|
||||
|
||||
if (isFileOwner && userId !== actualFileOwner) {
|
||||
return res.status(403).send('Forbidden');
|
||||
}
|
||||
|
||||
|
|
@ -338,7 +395,8 @@ router.get('/download/:userId/:file_id', async (req, res) => {
|
|||
}
|
||||
|
||||
const setHeaders = () => {
|
||||
res.setHeader('Content-Disposition', `attachment; filename="${file.filename}"`);
|
||||
const cleanedFilename = cleanFileName(file.filename);
|
||||
res.setHeader('Content-Disposition', `attachment; filename="${cleanedFilename}"`);
|
||||
res.setHeader('Content-Type', 'application/octet-stream');
|
||||
res.setHeader('X-File-Metadata', JSON.stringify(file));
|
||||
};
|
||||
|
|
@ -365,12 +423,17 @@ router.get('/download/:userId/:file_id', async (req, res) => {
|
|||
logger.debug(`File ${file_id} downloaded from OpenAI`);
|
||||
passThrough.body.pipe(res);
|
||||
} else {
|
||||
fileStream = getDownloadStream(file_id);
|
||||
fileStream = await getDownloadStream(req, file.filepath);
|
||||
|
||||
fileStream.on('error', (streamError) => {
|
||||
logger.error('[DOWNLOAD ROUTE] Stream error:', streamError);
|
||||
});
|
||||
|
||||
setHeaders();
|
||||
fileStream.pipe(res);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Error downloading file:', error);
|
||||
logger.error('[DOWNLOAD ROUTE] Error downloading file:', error);
|
||||
res.status(500).send('Error downloading file');
|
||||
}
|
||||
});
|
||||
|
|
@ -405,7 +468,6 @@ router.post('/', async (req, res) => {
|
|||
message = error.message;
|
||||
}
|
||||
|
||||
// TODO: delete remote file if it exists
|
||||
try {
|
||||
await fs.unlink(req.file.path);
|
||||
cleanup = false;
|
||||
|
|
|
|||
|
|
@ -165,6 +165,9 @@ describe('AppService', () => {
|
|||
agents: {
|
||||
disableBuilder: false,
|
||||
capabilities: expect.arrayContaining([...defaultAgentCapabilities]),
|
||||
maxCitations: 30,
|
||||
maxCitationsPerFile: 7,
|
||||
minRelevanceScore: 0.45,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -225,7 +225,17 @@ const primeFiles = async (options, apiKey) => {
|
|||
entity_id: queryParams.entity_id,
|
||||
apiKey,
|
||||
});
|
||||
await updateFile({ file_id: file.file_id, metadata: { fileIdentifier } });
|
||||
|
||||
// Preserve existing metadata when adding fileIdentifier
|
||||
const updatedMetadata = {
|
||||
...file.metadata, // Preserve existing metadata (like S3 storage info)
|
||||
fileIdentifier, // Add fileIdentifier
|
||||
};
|
||||
|
||||
await updateFile({
|
||||
file_id: file.file_id,
|
||||
metadata: updatedMetadata,
|
||||
});
|
||||
sessions.set(session_id, true);
|
||||
pushFile();
|
||||
} catch (error) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const fetch = require('node-fetch');
|
||||
const { FileSources } = require('librechat-data-provider');
|
||||
const {
|
||||
|
|
@ -15,7 +14,7 @@ const { logger } = require('~/config');
|
|||
const bucketName = process.env.AWS_BUCKET_NAME;
|
||||
const defaultBasePath = 'images';
|
||||
|
||||
let s3UrlExpirySeconds = 7 * 24 * 60 * 60;
|
||||
let s3UrlExpirySeconds = 2 * 60; // 2 minutes
|
||||
let s3RefreshExpiryMs = null;
|
||||
|
||||
if (process.env.S3_URL_EXPIRY_SECONDS !== undefined) {
|
||||
|
|
@ -25,7 +24,7 @@ if (process.env.S3_URL_EXPIRY_SECONDS !== undefined) {
|
|||
s3UrlExpirySeconds = Math.min(parsed, 7 * 24 * 60 * 60);
|
||||
} else {
|
||||
logger.warn(
|
||||
`[S3] Invalid S3_URL_EXPIRY_SECONDS value: "${process.env.S3_URL_EXPIRY_SECONDS}". Using 7-day expiry.`,
|
||||
`[S3] Invalid S3_URL_EXPIRY_SECONDS value: "${process.env.S3_URL_EXPIRY_SECONDS}". Using 2-minute expiry.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -80,12 +79,29 @@ async function saveBufferToS3({ userId, buffer, fileName, basePath = defaultBase
|
|||
* @param {string} params.userId - The user's unique identifier.
|
||||
* @param {string} params.fileName - The file name in S3.
|
||||
* @param {string} [params.basePath='images'] - The base path in the bucket.
|
||||
* @param {string} [params.customFilename] - Custom filename for Content-Disposition header (overrides extracted filename).
|
||||
* @param {string} [params.contentType] - Custom content type for the response.
|
||||
* @returns {Promise<string>} A URL to access the S3 object
|
||||
*/
|
||||
async function getS3URL({ userId, fileName, basePath = defaultBasePath }) {
|
||||
async function getS3URL({
|
||||
userId,
|
||||
fileName,
|
||||
basePath = defaultBasePath,
|
||||
customFilename = null,
|
||||
contentType = null,
|
||||
}) {
|
||||
const key = getS3Key(basePath, userId, fileName);
|
||||
const params = { Bucket: bucketName, Key: key };
|
||||
|
||||
// Add response headers if specified
|
||||
if (customFilename) {
|
||||
params.ResponseContentDisposition = `attachment; filename="${customFilename}"`;
|
||||
}
|
||||
|
||||
if (contentType) {
|
||||
params.ResponseContentType = contentType;
|
||||
}
|
||||
|
||||
try {
|
||||
const s3 = initializeS3();
|
||||
return await getSignedUrl(s3, new GetObjectCommand(params), { expiresIn: s3UrlExpirySeconds });
|
||||
|
|
@ -188,7 +204,7 @@ async function uploadFileToS3({ req, file, file_id, basePath = defaultBasePath }
|
|||
try {
|
||||
const inputFilePath = file.path;
|
||||
const userId = req.user.id;
|
||||
const fileName = `${file_id}__${path.basename(inputFilePath)}`;
|
||||
const fileName = `${file_id}__${file.originalname}`;
|
||||
const key = getS3Key(basePath, userId, fileName);
|
||||
|
||||
const stats = await fs.promises.stat(inputFilePath);
|
||||
|
|
|
|||
|
|
@ -60,13 +60,14 @@ const deleteVectors = async (req, file) => {
|
|||
* have a `path` property that points to the location of the uploaded file.
|
||||
* @param {string} params.file_id - The file ID.
|
||||
* @param {string} [params.entity_id] - The entity ID for shared resources.
|
||||
* @param {Object} [params.storageMetadata] - Storage metadata for dual storage pattern.
|
||||
*
|
||||
* @returns {Promise<{ filepath: string, bytes: number }>}
|
||||
* A promise that resolves to an object containing:
|
||||
* - filepath: The path where the file is saved.
|
||||
* - bytes: The size of the file in bytes.
|
||||
*/
|
||||
async function uploadVectors({ req, file, file_id, entity_id }) {
|
||||
async function uploadVectors({ req, file, file_id, entity_id, storageMetadata }) {
|
||||
if (!process.env.RAG_API_URL) {
|
||||
throw new Error('RAG_API_URL not defined');
|
||||
}
|
||||
|
|
@ -80,6 +81,11 @@ async function uploadVectors({ req, file, file_id, entity_id }) {
|
|||
formData.append('entity_id', entity_id);
|
||||
}
|
||||
|
||||
// Include storage metadata for RAG API to store with embeddings
|
||||
if (storageMetadata) {
|
||||
formData.append('storage_metadata', JSON.stringify(storageMetadata));
|
||||
}
|
||||
|
||||
const formHeaders = formData.getHeaders();
|
||||
|
||||
const response = await axios.post(`${process.env.RAG_API_URL}/embed`, formData, {
|
||||
|
|
|
|||
|
|
@ -11,13 +11,12 @@ const {
|
|||
EModelEndpoint,
|
||||
EToolResources,
|
||||
mergeFileConfig,
|
||||
hostImageIdSuffix,
|
||||
AgentCapabilities,
|
||||
checkOpenAIStorage,
|
||||
removeNullishValues,
|
||||
hostImageNamePrefix,
|
||||
isAssistantsEndpoint,
|
||||
} = require('librechat-data-provider');
|
||||
const { sanitizeFilename } = require('@librechat/api');
|
||||
const { EnvVar } = require('@librechat/agents');
|
||||
const {
|
||||
convertImage,
|
||||
|
|
@ -35,6 +34,29 @@ const { getStrategyFunctions } = require('./strategies');
|
|||
const { determineFileType } = require('~/server/utils');
|
||||
const { logger } = require('~/config');
|
||||
|
||||
/**
|
||||
* Creates a modular file upload wrapper that ensures filename sanitization
|
||||
* across all storage strategies. This prevents storage-specific implementations
|
||||
* from having to handle sanitization individually.
|
||||
*
|
||||
* @param {Function} uploadFunction - The storage strategy's upload function
|
||||
* @returns {Function} - Wrapped upload function with sanitization
|
||||
*/
|
||||
const createSanitizedUploadWrapper = (uploadFunction) => {
|
||||
return async (params) => {
|
||||
const { req, file, file_id, ...restParams } = params;
|
||||
|
||||
// Create a modified file object with sanitized original name
|
||||
// This ensures consistent filename handling across all storage strategies
|
||||
const sanitizedFile = {
|
||||
...file,
|
||||
originalname: sanitizeFilename(file.originalname),
|
||||
};
|
||||
|
||||
return uploadFunction({ req, file: sanitizedFile, file_id, ...restParams });
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Array<MongoFile>} files
|
||||
|
|
@ -391,9 +413,10 @@ const processFileUpload = async ({ req, res, metadata }) => {
|
|||
const isAssistantUpload = isAssistantsEndpoint(metadata.endpoint);
|
||||
const assistantSource =
|
||||
metadata.endpoint === EModelEndpoint.azureAssistants ? FileSources.azure : FileSources.openai;
|
||||
const source = isAssistantUpload ? assistantSource : FileSources.vectordb;
|
||||
// Use the configured file strategy for regular file uploads (not vectordb)
|
||||
const source = isAssistantUpload ? assistantSource : req.app.locals.fileStrategy;
|
||||
const { handleFileUpload } = getStrategyFunctions(source);
|
||||
const { file_id, temp_file_id } = metadata;
|
||||
const { file_id, temp_file_id = null } = metadata;
|
||||
|
||||
/** @type {OpenAI | undefined} */
|
||||
let openai;
|
||||
|
|
@ -402,6 +425,7 @@ const processFileUpload = async ({ req, res, metadata }) => {
|
|||
}
|
||||
|
||||
const { file } = req;
|
||||
const sanitizedUploadFn = createSanitizedUploadWrapper(handleFileUpload);
|
||||
const {
|
||||
id,
|
||||
bytes,
|
||||
|
|
@ -410,7 +434,7 @@ const processFileUpload = async ({ req, res, metadata }) => {
|
|||
embedded,
|
||||
height,
|
||||
width,
|
||||
} = await handleFileUpload({
|
||||
} = await sanitizedUploadFn({
|
||||
req,
|
||||
file,
|
||||
file_id,
|
||||
|
|
@ -449,7 +473,7 @@ const processFileUpload = async ({ req, res, metadata }) => {
|
|||
temp_file_id,
|
||||
bytes,
|
||||
filepath,
|
||||
filename: filename ?? file.originalname,
|
||||
filename: filename ?? sanitizeFilename(file.originalname),
|
||||
context: isAssistantUpload ? FileContext.assistants : FileContext.message_attachment,
|
||||
model: isAssistantUpload ? req.body.model : undefined,
|
||||
type: file.mimetype,
|
||||
|
|
@ -476,7 +500,7 @@ const processFileUpload = async ({ req, res, metadata }) => {
|
|||
*/
|
||||
const processAgentFileUpload = async ({ req, res, metadata }) => {
|
||||
const { file } = req;
|
||||
const { agent_id, tool_resource } = metadata;
|
||||
const { agent_id, tool_resource, file_id, temp_file_id = null } = metadata;
|
||||
if (agent_id && !tool_resource) {
|
||||
throw new Error('No tool resource provided for agent file upload');
|
||||
}
|
||||
|
|
@ -520,6 +544,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {
|
|||
if (!isFileSearchEnabled) {
|
||||
throw new Error('File search is not enabled for Agents');
|
||||
}
|
||||
// Note: File search processing continues to dual storage logic below
|
||||
} else if (tool_resource === EToolResources.ocr) {
|
||||
const isOCREnabled = await checkCapability(req, AgentCapabilities.ocr);
|
||||
if (!isOCREnabled) {
|
||||
|
|
@ -529,7 +554,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {
|
|||
const { handleFileUpload: uploadOCR } = getStrategyFunctions(
|
||||
req.app.locals?.ocr?.strategy ?? FileSources.mistral_ocr,
|
||||
);
|
||||
const { file_id, temp_file_id } = metadata;
|
||||
const { file_id, temp_file_id = null } = metadata;
|
||||
|
||||
const {
|
||||
text,
|
||||
|
|
@ -568,28 +593,53 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {
|
|||
.json({ message: 'Agent file uploaded and processed successfully', ...result });
|
||||
}
|
||||
|
||||
const source =
|
||||
// Dual storage pattern for RAG files: Storage + Vector DB
|
||||
let storageResult, embeddingResult;
|
||||
const source = req.app.locals.fileStrategy;
|
||||
|
||||
if (tool_resource === EToolResources.file_search) {
|
||||
// FIRST: Upload to Storage for permanent backup (S3/local/etc.)
|
||||
const { handleFileUpload } = getStrategyFunctions(source);
|
||||
const sanitizedUploadFn = createSanitizedUploadWrapper(handleFileUpload);
|
||||
storageResult = await sanitizedUploadFn({
|
||||
req,
|
||||
file,
|
||||
file_id,
|
||||
entity_id,
|
||||
basePath,
|
||||
});
|
||||
|
||||
// SECOND: Upload to Vector DB
|
||||
const { uploadVectors } = require('./VectorDB/crud');
|
||||
|
||||
embeddingResult = await uploadVectors({
|
||||
req,
|
||||
file,
|
||||
file_id,
|
||||
entity_id,
|
||||
});
|
||||
|
||||
// Vector status will be stored at root level, no need for metadata
|
||||
fileInfoMetadata = {};
|
||||
} else {
|
||||
// Standard single storage for non-RAG files
|
||||
const { handleFileUpload } = getStrategyFunctions(source);
|
||||
const sanitizedUploadFn = createSanitizedUploadWrapper(handleFileUpload);
|
||||
storageResult = await sanitizedUploadFn({
|
||||
req,
|
||||
file,
|
||||
file_id,
|
||||
entity_id,
|
||||
basePath,
|
||||
});
|
||||
}
|
||||
|
||||
const { bytes, filename, filepath: _filepath, height, width } = storageResult;
|
||||
// For RAG files, use embedding result; for others, use storage result
|
||||
const embedded =
|
||||
tool_resource === EToolResources.file_search
|
||||
? FileSources.vectordb
|
||||
: req.app.locals.fileStrategy;
|
||||
|
||||
const { handleFileUpload } = getStrategyFunctions(source);
|
||||
const { file_id, temp_file_id } = metadata;
|
||||
|
||||
const {
|
||||
bytes,
|
||||
filename,
|
||||
filepath: _filepath,
|
||||
embedded,
|
||||
height,
|
||||
width,
|
||||
} = await handleFileUpload({
|
||||
req,
|
||||
file,
|
||||
file_id,
|
||||
entity_id,
|
||||
basePath,
|
||||
});
|
||||
? embeddingResult?.embedded
|
||||
: storageResult.embedded;
|
||||
|
||||
let filepath = _filepath;
|
||||
|
||||
|
|
@ -618,7 +668,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {
|
|||
temp_file_id,
|
||||
bytes,
|
||||
filepath,
|
||||
filename: filename ?? file.originalname,
|
||||
filename: filename ?? sanitizeFilename(file.originalname),
|
||||
context: messageAttachment ? FileContext.message_attachment : FileContext.agents,
|
||||
model: messageAttachment ? undefined : req.body.model,
|
||||
metadata: fileInfoMetadata,
|
||||
|
|
@ -630,6 +680,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => {
|
|||
});
|
||||
|
||||
const result = await createFile(fileInfo, true);
|
||||
|
||||
res.status(200).json({ message: 'Agent file uploaded and processed successfully', ...result });
|
||||
};
|
||||
|
||||
|
|
@ -700,31 +751,24 @@ const processOpenAIImageOutput = async ({ req, buffer, file_id, filename, fileEx
|
|||
const currentDate = new Date();
|
||||
const formattedDate = currentDate.toISOString();
|
||||
const _file = await convertImage(req, buffer, undefined, `${file_id}${fileExt}`);
|
||||
// Determine the correct source for the assistant
|
||||
const source =
|
||||
req.body.endpoint === EModelEndpoint.azureAssistants ? FileSources.azure : FileSources.openai;
|
||||
|
||||
// Create only one file record with the correct information
|
||||
const file = {
|
||||
..._file,
|
||||
usage: 1,
|
||||
user: req.user.id,
|
||||
type: `image/${req.app.locals.imageOutputType}`,
|
||||
type: mime.getType(fileExt),
|
||||
createdAt: formattedDate,
|
||||
updatedAt: formattedDate,
|
||||
source: req.app.locals.fileStrategy,
|
||||
source,
|
||||
context: FileContext.assistants_output,
|
||||
file_id: `${file_id}${hostImageIdSuffix}`,
|
||||
filename: `${hostImageNamePrefix}${filename}`,
|
||||
file_id,
|
||||
filename,
|
||||
};
|
||||
createFile(file, true);
|
||||
const source =
|
||||
req.body.endpoint === EModelEndpoint.azureAssistants ? FileSources.azure : FileSources.openai;
|
||||
createFile(
|
||||
{
|
||||
...file,
|
||||
file_id,
|
||||
filename,
|
||||
source,
|
||||
type: mime.getType(fileExt),
|
||||
},
|
||||
true,
|
||||
);
|
||||
return file;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,26 @@ jest.mock('librechat-data-provider', () => ({
|
|||
mergeFileConfig: jest.fn(),
|
||||
removeNullishValues: jest.fn((obj) => obj),
|
||||
isAssistantsEndpoint: jest.fn(),
|
||||
Constants: { COMMANDS_MAX_LENGTH: 56 },
|
||||
PermissionTypes: {
|
||||
BOOKMARKS: 'BOOKMARKS',
|
||||
PROMPTS: 'PROMPTS',
|
||||
MEMORIES: 'MEMORIES',
|
||||
MULTI_CONVO: 'MULTI_CONVO',
|
||||
AGENTS: 'AGENTS',
|
||||
TEMPORARY_CHAT: 'TEMPORARY_CHAT',
|
||||
RUN_CODE: 'RUN_CODE',
|
||||
WEB_SEARCH: 'WEB_SEARCH',
|
||||
FILE_CITATIONS: 'FILE_CITATIONS',
|
||||
},
|
||||
Permissions: {
|
||||
USE: 'USE',
|
||||
OPT_OUT: 'OPT_OUT',
|
||||
},
|
||||
SystemRoles: {
|
||||
USER: 'USER',
|
||||
ADMIN: 'ADMIN',
|
||||
},
|
||||
}));
|
||||
|
||||
jest.mock('~/server/services/Files/images', () => ({
|
||||
|
|
|
|||
|
|
@ -522,6 +522,7 @@ async function loadAgentTools({ req, res, agent, tool_resources, openAIApiKey })
|
|||
if (includesWebSearch) {
|
||||
webSearchCallbacks = createOnSearchResults(res);
|
||||
}
|
||||
|
||||
const { loadedTools, toolContextMap } = await loadTools({
|
||||
agent,
|
||||
functions: true,
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ async function loadDefaultInterface(config, configDefaults, roleName = SystemRol
|
|||
runCode: interfaceConfig?.runCode ?? defaults.runCode,
|
||||
webSearch: interfaceConfig?.webSearch ?? defaults.webSearch,
|
||||
fileSearch: interfaceConfig?.fileSearch ?? defaults.fileSearch,
|
||||
fileCitations: interfaceConfig?.fileCitations ?? defaults.fileCitations,
|
||||
customWelcome: interfaceConfig?.customWelcome ?? defaults.customWelcome,
|
||||
});
|
||||
|
||||
|
|
@ -67,6 +68,7 @@ async function loadDefaultInterface(config, configDefaults, roleName = SystemRol
|
|||
[PermissionTypes.RUN_CODE]: { [Permissions.USE]: loadedInterface.runCode },
|
||||
[PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: loadedInterface.webSearch },
|
||||
[PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: loadedInterface.fileSearch },
|
||||
[PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: loadedInterface.fileCitations },
|
||||
});
|
||||
await updateAccessPermissions(SystemRoles.ADMIN, {
|
||||
[PermissionTypes.PROMPTS]: { [Permissions.USE]: loadedInterface.prompts },
|
||||
|
|
@ -81,6 +83,7 @@ async function loadDefaultInterface(config, configDefaults, roleName = SystemRol
|
|||
[PermissionTypes.RUN_CODE]: { [Permissions.USE]: loadedInterface.runCode },
|
||||
[PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: loadedInterface.webSearch },
|
||||
[PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: loadedInterface.fileSearch },
|
||||
[PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: loadedInterface.fileCitations },
|
||||
});
|
||||
|
||||
let i = 0;
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ describe('loadDefaultInterface', () => {
|
|||
runCode: true,
|
||||
webSearch: true,
|
||||
fileSearch: true,
|
||||
fileCitations: true,
|
||||
},
|
||||
};
|
||||
const configDefaults = { interface: {} };
|
||||
|
|
@ -35,6 +36,7 @@ describe('loadDefaultInterface', () => {
|
|||
[PermissionTypes.RUN_CODE]: { [Permissions.USE]: true },
|
||||
[PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: true },
|
||||
[PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: true },
|
||||
[PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: true },
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -50,6 +52,7 @@ describe('loadDefaultInterface', () => {
|
|||
runCode: false,
|
||||
webSearch: false,
|
||||
fileSearch: false,
|
||||
fileCitations: false,
|
||||
},
|
||||
};
|
||||
const configDefaults = { interface: {} };
|
||||
|
|
@ -66,6 +69,7 @@ describe('loadDefaultInterface', () => {
|
|||
[PermissionTypes.RUN_CODE]: { [Permissions.USE]: false },
|
||||
[PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: false },
|
||||
[PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: false },
|
||||
[PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: false },
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -88,6 +92,7 @@ describe('loadDefaultInterface', () => {
|
|||
[PermissionTypes.RUN_CODE]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: undefined },
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -122,6 +127,7 @@ describe('loadDefaultInterface', () => {
|
|||
[PermissionTypes.RUN_CODE]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: undefined },
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -137,6 +143,7 @@ describe('loadDefaultInterface', () => {
|
|||
runCode: false,
|
||||
webSearch: true,
|
||||
fileSearch: false,
|
||||
fileCitations: true,
|
||||
},
|
||||
};
|
||||
const configDefaults = { interface: {} };
|
||||
|
|
@ -153,6 +160,7 @@ describe('loadDefaultInterface', () => {
|
|||
[PermissionTypes.RUN_CODE]: { [Permissions.USE]: false },
|
||||
[PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: true },
|
||||
[PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: false },
|
||||
[PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: true },
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -169,6 +177,7 @@ describe('loadDefaultInterface', () => {
|
|||
runCode: true,
|
||||
webSearch: true,
|
||||
fileSearch: true,
|
||||
fileCitations: true,
|
||||
},
|
||||
};
|
||||
|
||||
|
|
@ -184,6 +193,7 @@ describe('loadDefaultInterface', () => {
|
|||
[PermissionTypes.RUN_CODE]: { [Permissions.USE]: true },
|
||||
[PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: true },
|
||||
[PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: true },
|
||||
[PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: true },
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -206,6 +216,7 @@ describe('loadDefaultInterface', () => {
|
|||
[PermissionTypes.RUN_CODE]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: undefined },
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -228,6 +239,7 @@ describe('loadDefaultInterface', () => {
|
|||
[PermissionTypes.RUN_CODE]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: undefined },
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -250,6 +262,7 @@ describe('loadDefaultInterface', () => {
|
|||
[PermissionTypes.RUN_CODE]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: undefined },
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -280,6 +293,7 @@ describe('loadDefaultInterface', () => {
|
|||
[PermissionTypes.RUN_CODE]: { [Permissions.USE]: false },
|
||||
[PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: true },
|
||||
[PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: undefined },
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -311,6 +325,7 @@ describe('loadDefaultInterface', () => {
|
|||
[PermissionTypes.RUN_CODE]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: true },
|
||||
[PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: undefined },
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -324,6 +339,7 @@ describe('loadDefaultInterface', () => {
|
|||
agents: false,
|
||||
temporaryChat: true,
|
||||
runCode: false,
|
||||
fileCitations: true,
|
||||
},
|
||||
};
|
||||
const configDefaults = { interface: {} };
|
||||
|
|
@ -417,6 +433,45 @@ describe('loadDefaultInterface', () => {
|
|||
[PermissionTypes.RUN_CODE]: { [Permissions.USE]: false },
|
||||
[PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: true },
|
||||
[PermissionTypes.FILE_SEARCH]: { [Permissions.USE]: true },
|
||||
[PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: true },
|
||||
});
|
||||
});
|
||||
|
||||
it('should call updateAccessPermissions with the correct parameters when fileCitations is true', async () => {
|
||||
const config = { interface: { fileCitations: true } };
|
||||
const configDefaults = { interface: {} };
|
||||
|
||||
await loadDefaultInterface(config, configDefaults);
|
||||
|
||||
expect(updateAccessPermissions).toHaveBeenCalledWith(SystemRoles.USER, {
|
||||
[PermissionTypes.PROMPTS]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.BOOKMARKS]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.MEMORIES]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.MULTI_CONVO]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.AGENTS]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.TEMPORARY_CHAT]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.RUN_CODE]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: true },
|
||||
});
|
||||
});
|
||||
|
||||
it('should call updateAccessPermissions with false when fileCitations is false', async () => {
|
||||
const config = { interface: { fileCitations: false } };
|
||||
const configDefaults = { interface: {} };
|
||||
|
||||
await loadDefaultInterface(config, configDefaults);
|
||||
|
||||
expect(updateAccessPermissions).toHaveBeenCalledWith(SystemRoles.USER, {
|
||||
[PermissionTypes.PROMPTS]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.BOOKMARKS]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.MEMORIES]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.MULTI_CONVO]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.AGENTS]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.TEMPORARY_CHAT]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.RUN_CODE]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.WEB_SEARCH]: { [Permissions.USE]: undefined },
|
||||
[PermissionTypes.FILE_CITATIONS]: { [Permissions.USE]: false },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -44,4 +44,24 @@ const getBufferMetadata = async (buffer) => {
|
|||
};
|
||||
};
|
||||
|
||||
module.exports = { determineFileType, getBufferMetadata };
|
||||
/**
|
||||
* Removes UUID prefix from filename for clean display
|
||||
* Pattern: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx__filename.ext
|
||||
* @param {string} fileName - The filename to clean
|
||||
* @returns {string} - The cleaned filename without UUID prefix
|
||||
*/
|
||||
const cleanFileName = (fileName) => {
|
||||
if (!fileName) {
|
||||
return fileName;
|
||||
}
|
||||
|
||||
// Remove UUID pattern: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx__
|
||||
const cleaned = fileName.replace(
|
||||
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}__/i,
|
||||
'',
|
||||
);
|
||||
|
||||
return cleaned;
|
||||
};
|
||||
|
||||
module.exports = { determineFileType, getBufferMetadata, cleanFileName };
|
||||
|
|
|
|||
86
api/test/app/clients/tools/util/fileSearch.test.js
Normal file
86
api/test/app/clients/tools/util/fileSearch.test.js
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
const { createFileSearchTool } = require('../../../../../app/clients/tools/util/fileSearch');
|
||||
|
||||
// Mock dependencies
|
||||
jest.mock('../../../../../models', () => ({
|
||||
Files: {
|
||||
find: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
jest.mock('../../../../../server/services/Files/VectorDB/crud', () => ({
|
||||
queryVectors: jest.fn(),
|
||||
}));
|
||||
|
||||
jest.mock('../../../../../config', () => ({
|
||||
logger: {
|
||||
warn: jest.fn(),
|
||||
error: jest.fn(),
|
||||
debug: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
const { queryVectors } = require('../../../../../server/services/Files/VectorDB/crud');
|
||||
|
||||
describe('fileSearch.js - test only new file_id and page additions', () => {
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
// Test only the specific changes: file_id and page metadata additions
|
||||
it('should add file_id and page to search result format', async () => {
|
||||
const mockFiles = [{ file_id: 'test-file-123' }];
|
||||
const mockResults = [
|
||||
{
|
||||
data: [
|
||||
[
|
||||
{
|
||||
page_content: 'test content',
|
||||
metadata: { source: 'test.pdf', page: 1 },
|
||||
},
|
||||
0.3,
|
||||
],
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
queryVectors.mockResolvedValue(mockResults);
|
||||
|
||||
const fileSearchTool = await createFileSearchTool({
|
||||
req: { user: { id: 'user1' } },
|
||||
files: mockFiles,
|
||||
entity_id: 'agent-123',
|
||||
});
|
||||
|
||||
// Mock the tool's function to return the formatted result
|
||||
fileSearchTool.func = jest.fn().mockImplementation(async () => {
|
||||
// Simulate the new format with file_id and page
|
||||
const formattedResults = [
|
||||
{
|
||||
filename: 'test.pdf',
|
||||
content: 'test content',
|
||||
distance: 0.3,
|
||||
file_id: 'test-file-123', // NEW: added file_id
|
||||
page: 1, // NEW: added page
|
||||
},
|
||||
];
|
||||
|
||||
// NEW: Internal data section for processAgentResponse
|
||||
const internalData = formattedResults
|
||||
.map(
|
||||
(result) =>
|
||||
`File: ${result.filename}\nFile_ID: ${result.file_id}\nRelevance: ${(1.0 - result.distance).toFixed(4)}\nPage: ${result.page || 'N/A'}\nContent: ${result.content}\n`,
|
||||
)
|
||||
.join('\n---\n');
|
||||
|
||||
return `File: test.pdf\nRelevance: 0.7000\nContent: test content\n\n<!-- INTERNAL_DATA_START -->\n${internalData}\n<!-- INTERNAL_DATA_END -->`;
|
||||
});
|
||||
|
||||
const result = await fileSearchTool.func('test');
|
||||
|
||||
// Verify the new additions
|
||||
expect(result).toContain('File_ID: test-file-123');
|
||||
expect(result).toContain('Page: 1');
|
||||
expect(result).toContain('<!-- INTERNAL_DATA_START -->');
|
||||
expect(result).toContain('<!-- INTERNAL_DATA_END -->');
|
||||
});
|
||||
});
|
||||
72
api/test/server/services/Files/S3/crud.test.js
Normal file
72
api/test/server/services/Files/S3/crud.test.js
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
const { getS3URL } = require('../../../../../server/services/Files/S3/crud');
|
||||
|
||||
// Mock AWS SDK
|
||||
jest.mock('@aws-sdk/client-s3', () => ({
|
||||
S3Client: jest.fn(() => ({
|
||||
send: jest.fn(),
|
||||
})),
|
||||
GetObjectCommand: jest.fn(),
|
||||
}));
|
||||
|
||||
jest.mock('@aws-sdk/s3-request-presigner', () => ({
|
||||
getSignedUrl: jest.fn(),
|
||||
}));
|
||||
|
||||
jest.mock('../../../../../config', () => ({
|
||||
logger: {
|
||||
error: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
const { getSignedUrl } = require('@aws-sdk/s3-request-presigner');
|
||||
const { GetObjectCommand } = require('@aws-sdk/client-s3');
|
||||
|
||||
describe('S3 crud.js - test only new parameter changes', () => {
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
process.env.AWS_BUCKET_NAME = 'test-bucket';
|
||||
});
|
||||
|
||||
// Test only the new customFilename parameter
|
||||
it('should include customFilename in response headers when provided', async () => {
|
||||
getSignedUrl.mockResolvedValue('https://test-presigned-url.com');
|
||||
|
||||
await getS3URL({
|
||||
userId: 'user123',
|
||||
fileName: 'test.pdf',
|
||||
customFilename: 'cleaned_filename.pdf',
|
||||
});
|
||||
|
||||
// Verify the new ResponseContentDisposition parameter is added to GetObjectCommand
|
||||
const commandArgs = GetObjectCommand.mock.calls[0][0];
|
||||
expect(commandArgs.ResponseContentDisposition).toBe(
|
||||
'attachment; filename="cleaned_filename.pdf"',
|
||||
);
|
||||
});
|
||||
|
||||
// Test only the new contentType parameter
|
||||
it('should include contentType in response headers when provided', async () => {
|
||||
getSignedUrl.mockResolvedValue('https://test-presigned-url.com');
|
||||
|
||||
await getS3URL({
|
||||
userId: 'user123',
|
||||
fileName: 'test.pdf',
|
||||
contentType: 'application/pdf',
|
||||
});
|
||||
|
||||
// Verify the new ResponseContentType parameter is added to GetObjectCommand
|
||||
const commandArgs = GetObjectCommand.mock.calls[0][0];
|
||||
expect(commandArgs.ResponseContentType).toBe('application/pdf');
|
||||
});
|
||||
|
||||
it('should work without new parameters (backward compatibility)', async () => {
|
||||
getSignedUrl.mockResolvedValue('https://test-presigned-url.com');
|
||||
|
||||
const result = await getS3URL({
|
||||
userId: 'user123',
|
||||
fileName: 'test.pdf',
|
||||
});
|
||||
|
||||
expect(result).toBe('https://test-presigned-url.com');
|
||||
});
|
||||
});
|
||||
237
api/test/services/Files/processAgentResponse.test.js
Normal file
237
api/test/services/Files/processAgentResponse.test.js
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
const { processAgentResponse } = require('../../../app/clients/agents/processAgentResponse');
|
||||
const { Files } = require('../../../models');
|
||||
const { getCustomConfig } = require('../../../server/services/Config/getCustomConfig');
|
||||
|
||||
// Mock dependencies
|
||||
jest.mock('../../../models', () => ({
|
||||
Files: {
|
||||
find: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
jest.mock('../../../server/services/Config/getCustomConfig', () => ({
|
||||
getCustomConfig: jest.fn(),
|
||||
}));
|
||||
|
||||
jest.mock('../../../config', () => ({
|
||||
logger: {
|
||||
warn: jest.fn(),
|
||||
error: jest.fn(),
|
||||
debug: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
describe('processAgentResponse', () => {
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
it('should return response unchanged when no messageId', async () => {
|
||||
const response = { messageId: null };
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123');
|
||||
expect(result).toBe(response);
|
||||
});
|
||||
|
||||
it('should return response unchanged when no file search results', async () => {
|
||||
getCustomConfig.mockResolvedValue({ endpoints: { agents: { maxCitations: 10 } } });
|
||||
|
||||
const response = { messageId: 'msg123' };
|
||||
const contentParts = [{ type: 'text', content: 'some text' }];
|
||||
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123', contentParts);
|
||||
expect(result).toBe(response);
|
||||
});
|
||||
|
||||
it('should process file search results and create attachments', async () => {
|
||||
getCustomConfig.mockResolvedValue({
|
||||
endpoints: { agents: { maxCitations: 10 } },
|
||||
fileStrategy: 's3',
|
||||
});
|
||||
|
||||
Files.find.mockResolvedValue([
|
||||
{
|
||||
file_id: 'file123',
|
||||
source: 's3',
|
||||
filename: 'test.pdf',
|
||||
},
|
||||
]);
|
||||
|
||||
const response = { messageId: 'msg123' };
|
||||
const contentParts = [
|
||||
{
|
||||
type: 'tool_call',
|
||||
tool_call: {
|
||||
name: 'file_search',
|
||||
output: `File: test.pdf
|
||||
File_ID: file123
|
||||
Relevance: 0.8
|
||||
Page: 1
|
||||
Storage_Type: s3
|
||||
S3_Bucket: test-bucket
|
||||
S3_Key: uploads/user123/file123__test.pdf
|
||||
Content: Test content`,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123', contentParts);
|
||||
|
||||
expect(result.attachments).toBeDefined();
|
||||
expect(result.attachments).toHaveLength(1);
|
||||
expect(result.attachments[0].type).toBe('file_search');
|
||||
expect(result.attachments[0].file_search.sources).toBeDefined();
|
||||
expect(result.attachments[0].file_search.sources).toHaveLength(1);
|
||||
|
||||
const source = result.attachments[0].file_search.sources[0];
|
||||
expect(source.fileId).toBe('file123');
|
||||
expect(source.fileName).toBe('test.pdf');
|
||||
expect(source.metadata.storageType).toBe('s3');
|
||||
});
|
||||
|
||||
it('should use configured fileStrategy when file metadata is missing', async () => {
|
||||
getCustomConfig.mockResolvedValue({
|
||||
endpoints: { agents: { maxCitations: 10 } },
|
||||
fileStrategy: 's3',
|
||||
});
|
||||
|
||||
Files.find.mockResolvedValue([
|
||||
{
|
||||
file_id: 'file123',
|
||||
// source is undefined, should fallback to fileStrategy
|
||||
},
|
||||
]);
|
||||
|
||||
const response = { messageId: 'msg123' };
|
||||
const contentParts = [
|
||||
{
|
||||
type: 'tool_call',
|
||||
tool_call: {
|
||||
name: 'file_search',
|
||||
output: `File: test.pdf
|
||||
File_ID: file123
|
||||
Relevance: 0.8
|
||||
Content: Test content`,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123', contentParts);
|
||||
|
||||
const source = result.attachments[0].file_search.sources[0];
|
||||
expect(source.metadata.storageType).toBe('s3'); // Should use fileStrategy
|
||||
});
|
||||
|
||||
it('should handle file diversity and allow multiple pages per file', async () => {
|
||||
getCustomConfig.mockResolvedValue({
|
||||
endpoints: { agents: { maxCitations: 5, maxCitationsPerFile: 3 } },
|
||||
fileStrategy: 's3',
|
||||
});
|
||||
|
||||
Files.find.mockResolvedValue([
|
||||
{ file_id: 'file1', source: 'local', filename: 'test1.pdf' },
|
||||
{ file_id: 'file2', source: 'local', filename: 'test2.pdf' },
|
||||
]);
|
||||
|
||||
const response = { messageId: 'msg123' };
|
||||
const contentParts = [
|
||||
{
|
||||
type: 'tool_call',
|
||||
tool_call: {
|
||||
name: 'file_search',
|
||||
output: `File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.9
|
||||
Page: 1
|
||||
Content: High relevance content
|
||||
|
||||
---
|
||||
|
||||
File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.7
|
||||
Page: 2
|
||||
Content: Lower relevance content
|
||||
|
||||
---
|
||||
|
||||
File: test2.pdf
|
||||
File_ID: file2
|
||||
Relevance: 0.8
|
||||
Page: 1
|
||||
Content: Different file content`,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123', contentParts);
|
||||
|
||||
const sources = result.attachments[0].file_search.sources;
|
||||
expect(sources.length).toBeGreaterThanOrEqual(2); // Can include multiple pages per file now
|
||||
|
||||
// Should have both files represented
|
||||
const fileIds = sources.map((s) => s.fileId);
|
||||
expect(fileIds).toContain('file1');
|
||||
expect(fileIds).toContain('file2');
|
||||
|
||||
// Should include multiple pages from file1 due to high relevance
|
||||
const file1Sources = sources.filter((s) => s.fileId === 'file1');
|
||||
expect(file1Sources.length).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
|
||||
it('should respect maxCitationsPerFile configuration', async () => {
|
||||
getCustomConfig.mockResolvedValue({
|
||||
endpoints: { agents: { maxCitations: 10, maxCitationsPerFile: 2 } },
|
||||
fileStrategy: 'local',
|
||||
});
|
||||
|
||||
Files.find.mockResolvedValue([{ file_id: 'file1', source: 'local', filename: 'test1.pdf' }]);
|
||||
|
||||
const response = { messageId: 'msg123' };
|
||||
const contentParts = [
|
||||
{
|
||||
type: 'tool_call',
|
||||
tool_call: {
|
||||
name: 'file_search',
|
||||
output: `File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.9
|
||||
Page: 1
|
||||
Content: Page 1 content
|
||||
|
||||
---
|
||||
|
||||
File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.8
|
||||
Page: 2
|
||||
Content: Page 2 content
|
||||
|
||||
---
|
||||
|
||||
File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.7
|
||||
Page: 3
|
||||
Content: Page 3 content
|
||||
|
||||
---
|
||||
|
||||
File: test1.pdf
|
||||
File_ID: file1
|
||||
Relevance: 0.6
|
||||
Page: 4
|
||||
Content: Page 4 content`,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const result = await processAgentResponse(response, 'user123', 'conv123', contentParts);
|
||||
|
||||
const sources = result.attachments[0].file_search.sources;
|
||||
expect(sources).toHaveLength(2); // Should be limited to maxCitationsPerFile (2)
|
||||
|
||||
// Should include the 2 highest relevance pages (0.9 and 0.8)
|
||||
expect(sources[0].relevance).toBe(0.9);
|
||||
expect(sources[1].relevance).toBe(0.8);
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue