mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 08:20:14 +01:00
* refactor: move endpoint initialization methods to typescript * refactor: move agent init to packages/api - Introduced `initialize.ts` for agent initialization, including file processing and tool loading. - Updated `resources.ts` to allow optional appConfig parameter. - Enhanced endpoint configuration handling in various initialization files to support model parameters. - Added new artifacts and prompts for React component generation. - Refactored existing code to improve type safety and maintainability. * refactor: streamline endpoint initialization and enhance type safety - Updated initialization functions across various endpoints to use a consistent request structure, replacing `unknown` types with `ServerResponse`. - Simplified request handling by directly extracting keys from the request body. - Improved type safety by ensuring user IDs are safely accessed with optional chaining. - Removed unnecessary parameters and streamlined model options handling for better clarity and maintainability. * refactor: moved ModelService and extractBaseURL to packages/api - Added comprehensive tests for the models fetching functionality, covering scenarios for OpenAI, Anthropic, Google, and Ollama models. - Updated existing endpoint index to include the new models module. - Enhanced utility functions for URL extraction and model data processing. - Improved type safety and error handling across the models fetching logic. * refactor: consolidate utility functions and remove unused files - Merged `deriveBaseURL` and `extractBaseURL` into the `@librechat/api` module for better organization. - Removed redundant utility files and their associated tests to streamline the codebase. - Updated imports across various client files to utilize the new consolidated functions. - Enhanced overall maintainability by reducing the number of utility modules. * refactor: replace ModelService references with direct imports from @librechat/api and remove ModelService file * refactor: move encrypt/decrypt methods and key db methods to data-schemas, use `getProviderConfig` from `@librechat/api` * chore: remove unused 'res' from options in AgentClient * refactor: file model imports and methods - Updated imports in various controllers and services to use the unified file model from '~/models' instead of '~/models/File'. - Consolidated file-related methods into a new file methods module in the data-schemas package. - Added comprehensive tests for file methods including creation, retrieval, updating, and deletion. - Enhanced the initializeAgent function to accept dependency injection for file-related methods. - Improved error handling and logging in file methods. * refactor: streamline database method references in agent initialization * refactor: enhance file method tests and update type references to IMongoFile * refactor: consolidate database method imports in agent client and initialization * chore: remove redundant import of initializeAgent from @librechat/api * refactor: move checkUserKeyExpiry utility to @librechat/api and update references across endpoints * refactor: move updateUserPlugins logic to user.ts and simplify UserController * refactor: update imports for user key management and remove UserService * refactor: remove unused Anthropics and Bedrock endpoint files and clean up imports * refactor: consolidate and update encryption imports across various files to use @librechat/data-schemas * chore: update file model mock to use unified import from '~/models' * chore: import order * refactor: remove migrated to TS agent.js file and its associated logic from the endpoints * chore: add reusable function to extract imports from source code in unused-packages workflow * chore: enhance unused-packages workflow to include @librechat/api dependencies and improve dependency extraction * chore: improve dependency extraction in unused-packages workflow with enhanced error handling and debugging output * chore: add detailed debugging output to unused-packages workflow for better visibility into unused dependencies and exclusion lists * chore: refine subpath handling in unused-packages workflow to correctly process scoped and non-scoped package imports * chore: clean up unused debug output in unused-packages workflow and reorganize type imports in initialize.ts
196 lines
7.1 KiB
JavaScript
196 lines
7.1 KiB
JavaScript
const { z } = require('zod');
|
|
const axios = require('axios');
|
|
const { tool } = require('@langchain/core/tools');
|
|
const { logger } = require('@librechat/data-schemas');
|
|
const { generateShortLivedToken } = require('@librechat/api');
|
|
const { Tools, EToolResources } = require('librechat-data-provider');
|
|
const { filterFilesByAgentAccess } = require('~/server/services/Files/permissions');
|
|
const { getFiles } = require('~/models');
|
|
|
|
/**
|
|
*
|
|
* @param {Object} options
|
|
* @param {ServerRequest} options.req
|
|
* @param {Agent['tool_resources']} options.tool_resources
|
|
* @param {string} [options.agentId] - The agent ID for file access control
|
|
* @returns {Promise<{
|
|
* files: Array<{ file_id: string; filename: string }>,
|
|
* toolContext: string
|
|
* }>}
|
|
*/
|
|
const primeFiles = async (options) => {
|
|
const { tool_resources, req, agentId } = options;
|
|
const file_ids = tool_resources?.[EToolResources.file_search]?.file_ids ?? [];
|
|
const agentResourceIds = new Set(file_ids);
|
|
const resourceFiles = tool_resources?.[EToolResources.file_search]?.files ?? [];
|
|
|
|
// Get all files first
|
|
const allFiles = (await getFiles({ file_id: { $in: file_ids } }, null, { text: 0 })) ?? [];
|
|
|
|
// Filter by access if user and agent are provided
|
|
let dbFiles;
|
|
if (req?.user?.id && agentId) {
|
|
dbFiles = await filterFilesByAgentAccess({
|
|
files: allFiles,
|
|
userId: req.user.id,
|
|
role: req.user.role,
|
|
agentId,
|
|
});
|
|
} else {
|
|
dbFiles = allFiles;
|
|
}
|
|
|
|
dbFiles = dbFiles.concat(resourceFiles);
|
|
|
|
let toolContext = `- Note: Semantic search is available through the ${Tools.file_search} tool but no files are currently loaded. Request the user to upload documents to search through.`;
|
|
|
|
const files = [];
|
|
for (let i = 0; i < dbFiles.length; i++) {
|
|
const file = dbFiles[i];
|
|
if (!file) {
|
|
continue;
|
|
}
|
|
if (i === 0) {
|
|
toolContext = `- Note: Use the ${Tools.file_search} tool to find relevant information within:`;
|
|
}
|
|
toolContext += `\n\t- ${file.filename}${
|
|
agentResourceIds.has(file.file_id) ? '' : ' (just attached by user)'
|
|
}`;
|
|
files.push({
|
|
file_id: file.file_id,
|
|
filename: file.filename,
|
|
});
|
|
}
|
|
|
|
return { files, toolContext };
|
|
};
|
|
|
|
/**
|
|
*
|
|
* @param {Object} options
|
|
* @param {string} options.userId
|
|
* @param {Array<{ file_id: string; filename: string }>} options.files
|
|
* @param {string} [options.entity_id]
|
|
* @param {boolean} [options.fileCitations=false] - Whether to include citation instructions
|
|
* @returns
|
|
*/
|
|
const createFileSearchTool = async ({ userId, files, entity_id, fileCitations = false }) => {
|
|
return tool(
|
|
async ({ query }) => {
|
|
if (files.length === 0) {
|
|
return ['No files to search. Instruct the user to add files for the search.', undefined];
|
|
}
|
|
const jwtToken = generateShortLivedToken(userId);
|
|
if (!jwtToken) {
|
|
return ['There was an error authenticating the file search request.', undefined];
|
|
}
|
|
|
|
/**
|
|
* @param {import('librechat-data-provider').TFile} file
|
|
* @returns {{ file_id: string, query: string, k: number, entity_id?: string }}
|
|
*/
|
|
const createQueryBody = (file) => {
|
|
const body = {
|
|
file_id: file.file_id,
|
|
query,
|
|
k: 5,
|
|
};
|
|
if (!entity_id) {
|
|
return body;
|
|
}
|
|
body.entity_id = entity_id;
|
|
logger.debug(`[${Tools.file_search}] RAG API /query body`, body);
|
|
return body;
|
|
};
|
|
|
|
const queryPromises = files.map((file) =>
|
|
axios
|
|
.post(`${process.env.RAG_API_URL}/query`, createQueryBody(file), {
|
|
headers: {
|
|
Authorization: `Bearer ${jwtToken}`,
|
|
'Content-Type': 'application/json',
|
|
},
|
|
})
|
|
.catch((error) => {
|
|
logger.error('Error encountered in `file_search` while querying file:', error);
|
|
return null;
|
|
}),
|
|
);
|
|
|
|
const results = await Promise.all(queryPromises);
|
|
const validResults = results.filter((result) => result !== null);
|
|
|
|
if (validResults.length === 0) {
|
|
return ['No results found or errors occurred while searching the files.', undefined];
|
|
}
|
|
|
|
const formattedResults = validResults
|
|
.flatMap((result, fileIndex) =>
|
|
result.data.map(([docInfo, distance]) => ({
|
|
filename: docInfo.metadata.source.split('/').pop(),
|
|
content: docInfo.page_content,
|
|
distance,
|
|
file_id: files[fileIndex]?.file_id,
|
|
page: docInfo.metadata.page || null,
|
|
})),
|
|
)
|
|
.sort((a, b) => a.distance - b.distance)
|
|
.slice(0, 10);
|
|
|
|
if (formattedResults.length === 0) {
|
|
return [
|
|
'No content found in the files. The files may not have been processed correctly or you may need to refine your query.',
|
|
undefined,
|
|
];
|
|
}
|
|
|
|
const formattedString = formattedResults
|
|
.map(
|
|
(result, index) =>
|
|
`File: ${result.filename}${
|
|
fileCitations ? `\nAnchor: \\ue202turn0file${index} (${result.filename})` : ''
|
|
}\nRelevance: ${(1.0 - result.distance).toFixed(4)}\nContent: ${result.content}\n`,
|
|
)
|
|
.join('\n---\n');
|
|
|
|
const sources = formattedResults.map((result) => ({
|
|
type: 'file',
|
|
fileId: result.file_id,
|
|
content: result.content,
|
|
fileName: result.filename,
|
|
relevance: 1.0 - result.distance,
|
|
pages: result.page ? [result.page] : [],
|
|
pageRelevance: result.page ? { [result.page]: 1.0 - result.distance } : {},
|
|
}));
|
|
|
|
return [formattedString, { [Tools.file_search]: { sources, fileCitations } }];
|
|
},
|
|
{
|
|
name: Tools.file_search,
|
|
responseFormat: 'content_and_artifact',
|
|
description: `Performs semantic search across attached "${Tools.file_search}" documents using natural language queries. This tool analyzes the content of uploaded files to find relevant information, quotes, and passages that best match your query. Use this to extract specific information or find relevant sections within the available documents.${
|
|
fileCitations
|
|
? `
|
|
|
|
**CITE FILE SEARCH RESULTS:**
|
|
Use the EXACT anchor markers shown below (copy them verbatim) immediately after statements derived from file content. Reference the filename in your text:
|
|
- File citation: "The document.pdf states that... \\ue202turn0file0"
|
|
- Page reference: "According to report.docx... \\ue202turn0file1"
|
|
- Multi-file: "Multiple sources confirm... \\ue200\\ue202turn0file0\\ue202turn0file1\\ue201"
|
|
|
|
**CRITICAL:** Output these escape sequences EXACTLY as shown (e.g., \\ue202turn0file0). Do NOT substitute with other characters like † or similar symbols.
|
|
**ALWAYS mention the filename in your text before the citation marker. NEVER use markdown links or footnotes.**`
|
|
: ''
|
|
}`,
|
|
schema: z.object({
|
|
query: z
|
|
.string()
|
|
.describe(
|
|
"A natural language query to search for relevant information in the files. Be specific and use keywords related to the information you're looking for. The query will be used for semantic similarity matching against the file contents.",
|
|
),
|
|
}),
|
|
},
|
|
);
|
|
};
|
|
|
|
module.exports = { createFileSearchTool, primeFiles };
|