LibreChat/api/server/routes/files/files.js
Danny Avila 52e59e40be
📚 feat: Add Source Citations for File Search in Agents (#8652)
* feat: Source Citations for file_search in Agents

* Fix: Added citation limits and relevance score to app service. Removed duplicate tests

*  feat: implement Role-level toggle to optionally disable file Source Citation in Agents

* 🐛 fix: update mock for librechat-data-provider to include PermissionTypes and SystemRoles

---------

Co-authored-by: “Praneeth <praneeth.goparaju@slalom.com>
2025-08-13 16:24:16 -04:00

491 lines
16 KiB
JavaScript

const fs = require('fs').promises;
const express = require('express');
const { EnvVar } = require('@librechat/agents');
const {
Time,
isUUID,
CacheKeys,
Constants,
FileSources,
EModelEndpoint,
isAgentsEndpoint,
checkOpenAIStorage,
} = require('librechat-data-provider');
const {
filterFile,
processFileUpload,
processDeleteRequest,
processAgentFileUpload,
} = require('~/server/services/Files/process');
const { getFiles, batchUpdateFiles, hasAccessToFilesViaAgent } = require('~/models/File');
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
const { getOpenAIClient } = require('~/server/controllers/assistants/helpers');
const { loadAuthValues } = require('~/server/services/Tools/credentials');
const { refreshS3FileUrls } = require('~/server/services/Files/S3/crud');
const { getProjectByName } = require('~/models/Project');
const { getAssistant } = require('~/models/Assistant');
const { getAgent } = require('~/models/Agent');
const { cleanFileName } = require('~/server/utils/files');
const { getLogStores } = require('~/cache');
const { logger } = require('~/config');
/**
* Checks if user has access to shared agent file through agent ownership or permissions
*/
const checkSharedFileAccess = async (userId, fileId) => {
try {
// Find agents that have this file in their tool_resources
const agentsWithFile = await getAgent({
$or: [
{ 'tool_resources.file_search.file_ids': fileId },
{ 'tool_resources.execute_code.file_ids': fileId },
{ 'tool_resources.ocr.file_ids': fileId },
],
});
if (!agentsWithFile || agentsWithFile.length === 0) {
return false;
}
// Check if user has access to any of these agents
for (const agent of Array.isArray(agentsWithFile) ? agentsWithFile : [agentsWithFile]) {
// Check if user is the agent author
if (agent.author && agent.author.toString() === userId) {
return true;
}
// Check if agent is collaborative
if (agent.isCollaborative) {
return true;
}
// Check if user has access through project membership
if (agent.projectIds && agent.projectIds.length > 0) {
// For now, return true if agent has project IDs (simplified check)
// This could be enhanced to check actual project membership
return true;
}
}
return false;
} catch (error) {
logger.error('[checkSharedFileAccess] Error:', error);
return false;
}
};
const router = express.Router();
router.get('/', async (req, res) => {
try {
const files = await getFiles({ user: req.user.id });
if (req.app.locals.fileStrategy === FileSources.s3) {
try {
const cache = getLogStores(CacheKeys.S3_EXPIRY_INTERVAL);
const alreadyChecked = await cache.get(req.user.id);
if (!alreadyChecked) {
await refreshS3FileUrls(files, batchUpdateFiles);
await cache.set(req.user.id, true, Time.THIRTY_MINUTES);
}
} catch (error) {
logger.warn('[/files] Error refreshing S3 file URLs:', error);
}
}
res.status(200).send(files);
} catch (error) {
logger.error('[/files] Error getting files:', error);
res.status(400).json({ message: 'Error in request', error: error.message });
}
});
/**
* Get files specific to an agent
* @route GET /files/agent/:agent_id
* @param {string} agent_id - The agent ID to get files for
* @returns {Promise<TFile[]>} Array of files attached to the agent
*/
router.get('/agent/:agent_id', async (req, res) => {
try {
const { agent_id } = req.params;
const userId = req.user.id;
if (!agent_id) {
return res.status(400).json({ error: 'Agent ID is required' });
}
// Get the agent to check ownership and attached files
const agent = await getAgent({ id: agent_id });
if (!agent) {
// No agent found, return empty array
return res.status(200).json([]);
}
// Check if user has access to the agent
if (agent.author.toString() !== userId) {
// Non-authors need the agent to be globally shared and collaborative
const globalProject = await getProjectByName(Constants.GLOBAL_PROJECT_NAME, '_id');
if (
!globalProject ||
!agent.projectIds.some((pid) => pid.toString() === globalProject._id.toString()) ||
!agent.isCollaborative
) {
return res.status(200).json([]);
}
}
// Collect all file IDs from agent's tool resources
const agentFileIds = [];
if (agent.tool_resources) {
for (const [, resource] of Object.entries(agent.tool_resources)) {
if (resource?.file_ids && Array.isArray(resource.file_ids)) {
agentFileIds.push(...resource.file_ids);
}
}
}
// If no files attached to agent, return empty array
if (agentFileIds.length === 0) {
return res.status(200).json([]);
}
// Get only the files attached to this agent
const files = await getFiles({ file_id: { $in: agentFileIds } }, null, { text: 0 });
res.status(200).json(files);
} catch (error) {
logger.error('[/files/agent/:agent_id] Error fetching agent files:', error);
res.status(500).json({ error: 'Failed to fetch agent files' });
}
});
router.get('/config', async (req, res) => {
try {
res.status(200).json(req.app.locals.fileConfig);
} catch (error) {
logger.error('[/files] Error getting fileConfig', error);
res.status(400).json({ message: 'Error in request', error: error.message });
}
});
router.delete('/', async (req, res) => {
try {
const { files: _files } = req.body;
/** @type {MongoFile[]} */
const files = _files.filter((file) => {
if (!file.file_id) {
return false;
}
if (!file.filepath) {
return false;
}
if (/^(file|assistant)-/.test(file.file_id)) {
return true;
}
return isUUID.safeParse(file.file_id).success;
});
if (files.length === 0) {
res.status(204).json({ message: 'Nothing provided to delete' });
return;
}
const fileIds = files.map((file) => file.file_id);
const dbFiles = await getFiles({ file_id: { $in: fileIds } });
const ownedFiles = [];
const nonOwnedFiles = [];
const fileMap = new Map();
for (const file of dbFiles) {
fileMap.set(file.file_id, file);
if (file.user.toString() === req.user.id) {
ownedFiles.push(file);
} else {
nonOwnedFiles.push(file);
}
}
// If all files are owned by the user, no need for further checks
if (nonOwnedFiles.length === 0) {
await processDeleteRequest({ req, files: ownedFiles });
logger.debug(
`[/files] Files deleted successfully: ${ownedFiles
.filter((f) => f.file_id)
.map((f) => f.file_id)
.join(', ')}`,
);
res.status(200).json({ message: 'Files deleted successfully' });
return;
}
// Check access for non-owned files
let authorizedFiles = [...ownedFiles];
let unauthorizedFiles = [];
if (req.body.agent_id && nonOwnedFiles.length > 0) {
// Batch check access for all non-owned files
const nonOwnedFileIds = nonOwnedFiles.map((f) => f.file_id);
const accessMap = await hasAccessToFilesViaAgent(
req.user.id,
nonOwnedFileIds,
req.body.agent_id,
);
// Separate authorized and unauthorized files
for (const file of nonOwnedFiles) {
if (accessMap.get(file.file_id)) {
authorizedFiles.push(file);
} else {
unauthorizedFiles.push(file);
}
}
} else {
// No agent context, all non-owned files are unauthorized
unauthorizedFiles = nonOwnedFiles;
}
if (unauthorizedFiles.length > 0) {
return res.status(403).json({
message: 'You can only delete files you have access to',
unauthorizedFiles: unauthorizedFiles.map((f) => f.file_id),
});
}
/* Handle agent unlinking even if no valid files to delete */
if (req.body.agent_id && req.body.tool_resource && dbFiles.length === 0) {
const agent = await getAgent({
id: req.body.agent_id,
});
const toolResourceFiles = agent.tool_resources?.[req.body.tool_resource]?.file_ids ?? [];
const agentFiles = files.filter((f) => toolResourceFiles.includes(f.file_id));
await processDeleteRequest({ req, files: agentFiles });
res.status(200).json({ message: 'File associations removed successfully from agent' });
return;
}
/* Handle assistant unlinking even if no valid files to delete */
if (req.body.assistant_id && req.body.tool_resource && dbFiles.length === 0) {
const assistant = await getAssistant({
id: req.body.assistant_id,
});
const toolResourceFiles = assistant.tool_resources?.[req.body.tool_resource]?.file_ids ?? [];
const assistantFiles = files.filter((f) => toolResourceFiles.includes(f.file_id));
await processDeleteRequest({ req, files: assistantFiles });
res.status(200).json({ message: 'File associations removed successfully from assistant' });
return;
} else if (
req.body.assistant_id &&
req.body.files?.[0]?.filepath === EModelEndpoint.azureAssistants
) {
await processDeleteRequest({ req, files: req.body.files });
return res
.status(200)
.json({ message: 'File associations removed successfully from Azure Assistant' });
}
await processDeleteRequest({ req, files: authorizedFiles });
logger.debug(
`[/files] Files deleted successfully: ${authorizedFiles
.filter((f) => f.file_id)
.map((f) => f.file_id)
.join(', ')}`,
);
res.status(200).json({ message: 'Files deleted successfully' });
} catch (error) {
logger.error('[/files] Error deleting files:', error);
res.status(400).json({ message: 'Error in request', error: error.message });
}
});
function isValidID(str) {
return /^[A-Za-z0-9_-]{21}$/.test(str);
}
router.get('/code/download/:session_id/:fileId', async (req, res) => {
try {
const { session_id, fileId } = req.params;
const logPrefix = `Session ID: ${session_id} | File ID: ${fileId} | Code output download requested by user `;
logger.debug(logPrefix);
if (!session_id || !fileId) {
return res.status(400).send('Bad request');
}
if (!isValidID(session_id) || !isValidID(fileId)) {
logger.debug(`${logPrefix} invalid session_id or fileId`);
return res.status(400).send('Bad request');
}
const { getDownloadStream } = getStrategyFunctions(FileSources.execute_code);
if (!getDownloadStream) {
logger.warn(
`${logPrefix} has no stream method implemented for ${FileSources.execute_code} source`,
);
return res.status(501).send('Not Implemented');
}
const result = await loadAuthValues({ userId: req.user.id, authFields: [EnvVar.CODE_API_KEY] });
/** @type {AxiosResponse<ReadableStream> | undefined} */
const response = await getDownloadStream(
`${session_id}/${fileId}`,
result[EnvVar.CODE_API_KEY],
);
res.set(response.headers);
response.data.pipe(res);
} catch (error) {
logger.error('Error downloading file:', error);
res.status(500).send('Error downloading file');
}
});
router.get('/download/:userId/:file_id', async (req, res) => {
try {
const { userId, file_id } = req.params;
logger.debug(`File download requested by user ${userId}: ${file_id}`);
const errorPrefix = `File download requested by user ${userId}`;
const [file] = await getFiles({ file_id });
if (!file) {
logger.warn(`${errorPrefix} not found: ${file_id}`);
return res.status(404).send('File not found');
}
// Extract actual file owner from S3 filepath (e.g., /uploads/ownerId/filename)
let actualFileOwner = userId;
if (file.filepath && file.filepath.includes('/uploads/')) {
const pathMatch = file.filepath.match(/\/uploads\/([^/]+)\//);
if (pathMatch) {
actualFileOwner = pathMatch[1];
}
}
// Check access: either own the file or have shared access through conversations
const isFileOwner = req.user.id === actualFileOwner;
const hasSharedAccess = !isFileOwner && (await checkSharedFileAccess(req.user.id, file_id));
if (!isFileOwner && !hasSharedAccess) {
return res.status(403).send('Forbidden');
}
if (isFileOwner && userId !== actualFileOwner) {
return res.status(403).send('Forbidden');
}
if (checkOpenAIStorage(file.source) && !file.model) {
logger.warn(`${errorPrefix} has no associated model: ${file_id}`);
return res.status(400).send('The model used when creating this file is not available');
}
const { getDownloadStream } = getStrategyFunctions(file.source);
if (!getDownloadStream) {
logger.warn(`${errorPrefix} has no stream method implemented: ${file.source}`);
return res.status(501).send('Not Implemented');
}
const setHeaders = () => {
const cleanedFilename = cleanFileName(file.filename);
res.setHeader('Content-Disposition', `attachment; filename="${cleanedFilename}"`);
res.setHeader('Content-Type', 'application/octet-stream');
res.setHeader('X-File-Metadata', JSON.stringify(file));
};
/** @type {{ body: import('stream').PassThrough } | undefined} */
let passThrough;
/** @type {ReadableStream | undefined} */
let fileStream;
if (checkOpenAIStorage(file.source)) {
req.body = { model: file.model };
const endpointMap = {
[FileSources.openai]: EModelEndpoint.assistants,
[FileSources.azure]: EModelEndpoint.azureAssistants,
};
const { openai } = await getOpenAIClient({
req,
res,
overrideEndpoint: endpointMap[file.source],
});
logger.debug(`Downloading file ${file_id} from OpenAI`);
passThrough = await getDownloadStream(file_id, openai);
setHeaders();
logger.debug(`File ${file_id} downloaded from OpenAI`);
passThrough.body.pipe(res);
} else {
fileStream = await getDownloadStream(req, file.filepath);
fileStream.on('error', (streamError) => {
logger.error('[DOWNLOAD ROUTE] Stream error:', streamError);
});
setHeaders();
fileStream.pipe(res);
}
} catch (error) {
logger.error('[DOWNLOAD ROUTE] Error downloading file:', error);
res.status(500).send('Error downloading file');
}
});
router.post('/', async (req, res) => {
const metadata = req.body;
let cleanup = true;
try {
filterFile({ req });
metadata.temp_file_id = metadata.file_id;
metadata.file_id = req.file_id;
if (isAgentsEndpoint(metadata.endpoint)) {
return await processAgentFileUpload({ req, res, metadata });
}
await processFileUpload({ req, res, metadata });
} catch (error) {
let message = 'Error processing file';
logger.error('[/files] Error processing file:', error);
if (error.message?.includes('file_ids')) {
message += ': ' + error.message;
}
if (
error.message?.includes('Invalid file format') ||
error.message?.includes('No OCR result')
) {
message = error.message;
}
try {
await fs.unlink(req.file.path);
cleanup = false;
} catch (error) {
logger.error('[/files] Error deleting file:', error);
}
res.status(500).json({ message });
} finally {
if (cleanup) {
try {
await fs.unlink(req.file.path);
} catch (error) {
logger.error('[/files] Error deleting file after file processing:', error);
}
} else {
logger.debug('[/files] File processing completed without cleanup');
}
}
});
module.exports = router;